Analysis date: 2021-03-30

Setup

Load libraries

library(plyr)
library(gtools)
library(pheatmap)
library(Matrix)
library(Hmisc)
library(ggpubr)
library(ggrepel)
library(DESeq2)
library(tidyverse)
library(vsn)
library(fdrtool)
library(limma)
library(apeglm)
library(MultiAssayExperiment)
library(gplots)
library(matrixStats)
library(DEqMS)

Load data

source("data/Figure_layouts.R")
load("data/CLL_Proteomics_Setup.RData")
load("data/CLL_Proteomics_ConsensusClustering.RData")
colData(multiomics_MAE)$PG <- as.factor(CCP_group5[rownames(colData(multiomics_MAE))])
colData(multiomics_MAE)$CCP6_RNA <- as.factor(CCP_group6_RNA[rownames(colData(multiomics_MAE))])

Limma Proteomics and genetic alterations

Functions

Limma + DEQMS function

Calculate_Limma <- function(assay_data, mut){
  # prepare assay data
  assay_data[is.infinite((assay_data))] <- NA
  is.na(assay_data) <- NA

  stopifnot(mut %in% colnames(proteomics_tbl_meta_biomart))
  # prepare row data
  row_data <- left_join((rownames(assay_data) %>% enframe(value = "rowname")),
              (proteomics_tbl_meta_biomart %>% 
                dplyr::select(rowname, start_position, end_position, chromosome_name,  mean_position) %>% unique %>%
                 group_by(rowname) %>% 
                 dplyr::slice(1) %>% ungroup() ),
              by=c("rowname")) %>% dplyr::select(-name) %>% as.data.frame()
  rownames(row_data) <- row_data$rowname
  
  stopifnot(all(rownames(assay_data)==rownames(row_data) ))

  # prepare column data
  col_data <- left_join((colnames(assay_data) %>% enframe(value = "colname")),
            (proteomics_tbl_meta_biomart %>% dplyr::select(colname, mut) %>% unique() ),
            by=c("colname") ) %>% dplyr::select(-name) %>% as.data.frame()
  rownames(col_data) <- col_data$colname
  
  stopifnot(all(colnames(assay_data)==rownames(col_data) ))
  if( (length(unique(col_data[,mut])) ==2) & !is.logical(col_data[,mut]) & is.numeric(col_data[,mut]) ){
    col_data[,mut] <- as.logical(col_data[,mut] == max(col_data[,mut]) )
  }
  
  #Creating an expression set
  raw_dataE <- ExpressionSet(assayData = assay_data,
                             phenoData = AnnotatedDataFrame(col_data),
                             featureData = AnnotatedDataFrame(row_data))
  validObject(raw_dataE)
  if(!is.logical(pData(raw_dataE)[,colnames(pData(raw_dataE))==mut]))stop("The mutation you want to look at is not logical")
  raw_dataE
  
  # Limma
  limma_data <- raw_dataE
  comparison <- c("mut - wt")
  
  colnames(pData(limma_data))[colnames(pData(limma_data)) == mut] <- "condition"
  pData(limma_data) <- mutate(pData(limma_data),condition= if_else(condition, "mut","wt") )
  limma_data <- limma_data[, !is.na(pData(limma_data)$cond)]
  limma.cond <- factor(pData(limma_data)$condition, ordered = FALSE)
  
  contrast.matrix <- model.matrix( ~ 0 + limma.cond)
  colnames(contrast.matrix) <- gsub("limma.cond", "", colnames(contrast.matrix))
  
  limma.object <- eBayes(
    contrasts.fit(
      lmFit(limma_data, design = contrast.matrix),
      makeContrasts(contrasts = comparison, levels = contrast.matrix)
      )
    )  

  ##### DEQMS
  tmp <- metadata(multiomics_MAE)$protein_description %>% dplyr::select(`Gene Name`, MinPSMQuant) %>% 
    mutate(MinPSMQuant= as.numeric(MinPSMQuant))
  psm.count.table <- tmp[,2] %>% as.data.frame()
  rownames(psm.count.table) <- tmp$`Gene Name`
  
  limma.object$count = psm.count.table[rownames(limma.object$coefficients),"MinPSMQuant"]
  limma_DEQMS = spectraCounteBayes(limma.object)
  
  DEqMS.results = outputResult(limma_DEQMS,coef_col = 1)
    
  limma_results_i <- DEqMS.results
    limma_results_i <- subset(limma_results_i, !is.na(logFC))
    limma_results_i$comparison <- comparison
    limma_results_i$mut <- mut
    colnames(limma_results_i)[c(8,9,10, 14, 15, 16)] <- 
      c("limma.t_beforeDEQMS", "pvalue.limma_beforeDEQMS" , "fdr.limma_beforeDEQMS", "t", "pvalue.limma", "fdr.limma")
    limma_results_i$fdr <- limma_results_i$fdr.limma
    limma_results_i$pvalue <- limma_results_i$pvalue.limma
    return(limma_results_i)

}

Annotation function

Annotate_Limma_Results <- function(limma_results){
  fdr_hit_threshold <- 0.001
  fdr_candidate_threshold = 0.01
  fc_hit_threshold <- 0.5
  fc_candidate_threshold <- 0.3
  
   limma_results$hit <-
    with(limma_results, ifelse(fdr <= fdr_hit_threshold & abs(logFC) >= fc_hit_threshold, TRUE, FALSE))
  limma_results$hit_annotation <- with(limma_results, 
                                       ifelse(fdr <= fdr_hit_threshold & abs(logFC) >= fc_hit_threshold, 
                                              "hit", 
                                              ifelse(fdr <= fdr_candidate_threshold & abs(logFC) >= fc_candidate_threshold,
                                                     "candidate", "no hit")))
  limma_results$hit_annotation <- factor(limma_results$hit_annotation, ordered = TRUE, levels = c("hit", "candidate", "no hit"))
  return(limma_results)
}

Rename deletions and gains functions

change_chr_abber_brackets <- function(levels_alt){
levels_alt %>% 
  str_replace(., "del", "del(") %>% str_replace(., "gain", "gain(") %>%
  str_replace(., "q", ")(q") %>% str_replace(., "p", ")(p") %>% 
  str_replace(., "11\\)\\(q", "11)(q22.3)") %>% 
  str_replace(., "p13", "p13)") %>% str_replace(., "q14", "q14)") %>%
  str_replace(., "q24", "q24)")
}

All genetic alterations limma

Conduct limma

limma_results <- NULL

mut_to_limma <- colnames(proteomics_tbl_meta_biomart)[-(1:12)][c(-15)]

for(i in 1:length(mut_to_limma)){
  m <- mut_to_limma[i]
  print(m)
  limma_results <- bind_rows(limma_results, 
                             Calculate_Limma(assay_data = assay(multiomics_MAE[prot_few_nas ,pat_overlap_prot_RNA ,"proteomics"]), mut = m))
}
## [1] "chrom_abber_del11q"
## [1] "chrom_abber_del13q14"
## [1] "chrom_abber_del17p13"
## [1] "chrom_abber_gain8q24"
## [1] "chrom_abber_trisomy12"
## [1] "SNPs_ATM"
## [1] "SNPs_BIRC3"
## [1] "SNPs_EGR2"
## [1] "SNPs_NOTCH1"
## [1] "SNPs_POT1"
## [1] "SNPs_SF3B1"
## [1] "SNPs_TP53"
## [1] "SNPs_XPO1"
## [1] "health_record_bin_IGHV_mutated"
## [1] "health_record_bin_treated"
limma_results$mut %>% unique
##  [1] "chrom_abber_del11q"             "chrom_abber_del13q14"          
##  [3] "chrom_abber_del17p13"           "chrom_abber_gain8q24"          
##  [5] "chrom_abber_trisomy12"          "SNPs_ATM"                      
##  [7] "SNPs_BIRC3"                     "SNPs_EGR2"                     
##  [9] "SNPs_NOTCH1"                    "SNPs_POT1"                     
## [11] "SNPs_SF3B1"                     "SNPs_TP53"                     
## [13] "SNPs_XPO1"                      "health_record_bin_IGHV_mutated"
## [15] "health_record_bin_treated"

Plot limma results

ggplot(data = limma_results) +
  geom_histogram(aes(pvalue.limma, alpha = 0.5), bins = 40) +
  guides(alpha = FALSE) +
  xlab("p-value") +
  facet_wrap( ~ mut +comparison, scale = "free_y") +
  coord_cartesian(xlim = c(0, 1)) +
  pp_sra + ggtitle("p-value histograms")

Limma Annotation of hits

Annotate

FDR 0.1%
limma_results <- Annotate_Limma_Results(limma_results)

with(limma_results, table(mut, hit_annotation))
##                                 hit_annotation
## mut                               hit candidate no hit
##   chrom_abber_del11q                0         6   7305
##   chrom_abber_del13q14              0         1   7310
##   chrom_abber_del17p13              0        10   7301
##   chrom_abber_gain8q24              1         0   7310
##   chrom_abber_trisomy12            67       221   7023
##   health_record_bin_IGHV_mutated   16        73   7222
##   health_record_bin_treated         2        36   7273
##   SNPs_ATM                          0         0   7311
##   SNPs_BIRC3                        0         4   7307
##   SNPs_EGR2                         4        10   7297
##   SNPs_NOTCH1                       0         0   7311
##   SNPs_POT1                         0         0   7311
##   SNPs_SF3B1                       29        53   7229
##   SNPs_TP53                         2        17   7292
##   SNPs_XPO1                         0         2   7309
message("Number of up and downregulated hits")
## Number of up and downregulated hits
with(limma_results %>% filter(hit==TRUE), table(mut, sign(logFC)))
##                                 
## mut                              -1  1
##   chrom_abber_gain8q24            1  0
##   chrom_abber_trisomy12          13 54
##   health_record_bin_IGHV_mutated  5 11
##   health_record_bin_treated       2  0
##   SNPs_EGR2                       0  4
##   SNPs_SF3B1                     26  3
##   SNPs_TP53                       0  2
dif_proteins_P_plot <- with(limma_results %>% mutate(dir=sign(logFC)), table("mut"=paste0(mut,"dir", dir), hit)) %>% as_tibble() %>% 
  separate(mut, into = c("mut", "dir"), sep = "dir") %>%
  filter(mut!="health_record_bin_treated") %>%
  mutate(dir=as.numeric(dir)) %>%
  mutate(n=n*dir) %>%
  mutate(mut=gsub("chrom_abber", "CNVs", mut), 
         mut=gsub("health_record_bin", "IGHV", mut), 
         mut= gsub("SNPs", "SNVs", mut) ) %>% 
  separate(mut, into = c("Type", "mut"), sep="_", extra = "merge" ) %>%
  mutate(mut = change_chr_abber_brackets(mut) ) %>%
  filter(hit==TRUE) %>%
  arrange(desc(n)) %>%
  mutate(mut = as.factor(mut)) %>%
  mutate(mut = factor(mut, levels = unique(.$mut))) %>%
  mutate(dir = if_else(dir == 1, "up", if_else(dir == (-1), "down", "NA" ))) %>%
  ggplot(aes(mut, n)) +
  geom_col(aes(fill=as.character(dir))) +
  ylab("Nr. differentially abundant proteins") +
  facet_grid(~Type, scales = "free_x", space="free") +
  theme_bw() + 
  theme(panel.grid = element_blank(), axis.text.x = element_text(angle = 90), axis.title.x = element_blank(), 
        strip.background = element_rect(fill = "white"), strip.text = element_text(face = "bold")) +
  scale_fill_manual(values = c("#0571b0", "#ca0020"), drop=FALSE) +
  geom_hline(yintercept = 0, color="darkgray")
  

dif_proteins_P_plot + geom_text(aes(mut, y=(-(-15-(sign(n)*45))), label=n ))

Plot Bargraph FDR 5%
dif_proteins_P_FDR5_plot <- 
  with(limma_results %>% 
         mutate(dir=sign(logFC), 
                hit = if_else(fdr <= 0.05 & 
                                abs(logFC) >= 0.5, TRUE, 
                              FALSE )), 
       table("mut"=paste0(mut,"dir", dir), hit)) %>% 
  as_tibble() %>% 
  separate(mut, into = c("mut", "dir"), sep = "dir") %>%
  filter(mut!="health_record_bin_treated") %>%
  mutate(dir=as.numeric(dir)) %>%
  mutate(n=n*dir) %>%
  mutate(mut=gsub("chrom_abber", "CNVs", mut), 
         mut=gsub("health_record_bin", "IGHV", mut), 
         mut= gsub("SNPs", "SNVs", mut) ) %>% 
  separate(mut, into = c("Type", "mut"), sep="_", extra = "merge" ) %>%
  mutate(mut = change_chr_abber_brackets(mut) ) %>%
  filter(hit==TRUE) %>%
  arrange(desc(n)) %>%
  mutate(mut = as.factor(mut)) %>%
  mutate(mut = factor(mut, levels = unique(.$mut))) %>%
  mutate(dir = if_else(dir == 1, "up", if_else(dir == (-1), "down", "NA" ))) %>%
  ggplot(aes(mut, n)) +
  geom_col(aes(fill=dir)) +
  ylab("Nr. differentially abundant proteins") +
  facet_grid(~Type, scales = "free_x", space="free") +
  theme_bw() + 
  theme(panel.grid = element_blank(), axis.text.x = element_text(angle = 90), axis.title.x = element_blank(), 
        strip.background = element_rect(fill = "white"), strip.text = element_text(face = "bold")) +
  scale_fill_manual(values = c("#0571b0", "#ca0020"), drop=FALSE) +
  geom_hline(yintercept = 0, color="darkgray")

Plot limma hits in volcano plot

All conditions

limma_protein_all_volcano_plots <- 
  limma_results %>%
  mutate(mut=gsub( "chrom_abber_", "", mut),
         mut=gsub( "SNPs_", "", mut),
         mut=gsub( "health_record_bin_", "", mut),
         FDR = if_else(fdr > 0.5, "no hit", 
                       if_else(fdr < 0.001, "FDR 0.1%", "FDR 5%"))) %>%
  mutate(mut = change_chr_abber_brackets(mut) ) %>%
  ggplot( aes(logFC, -log10(pvalue), colour = FDR)) +
    geom_vline(aes(xintercept = 0)) +
    geom_point(alpha=0.5) +
    facet_wrap( ~ mut , ncol = 4) +
    xlab("log2(fold change)") +
    scale_color_manual(values=c( "#ca0020", "#0571b0",  "grey40")) +
    pp_sra + 
    theme(legend.position = "top", legend.title = element_blank()) 

limma_protein_all_volcano_plots +   
  geom_text(aes(label = rowname), 
              data = subset( (limma_results %>%
                                mutate(mut=gsub( "chrom_abber_", "", mut),
         mut=gsub( "SNPs_", "", mut),
         mut=gsub( "health_record_bin_", "", mut)) %>%
             mutate(mut = change_chr_abber_brackets(mut) )),
                              hit_annotation == "hit"), 
              vjust = 0, nudge_y = 0.1, size = 3, check_overlap = FALSE, 
         color= "#ca0020") 

tmp <- limma_results %>% filter(hit==TRUE) %>% 
  mutate(direction=sign(logFC)) %>%
  group_by(mut) %>% arrange(fdr) %>% 
  dplyr::select(rowname, direction)
## Adding missing grouping variables: `mut`
message("Upregulated hits")
## Upregulated hits
sapply(group_nest(tmp)$mut, function(m){
  tmp %>% filter(mut==m, direction==1) %>% .$rowname
})
## $chrom_abber_gain8q24
## character(0)
## 
## $chrom_abber_trisomy12
##  [1] "EEA1"     "GOLGA3"   "DERA"     "PTPN11"   "CAMKK2"   "TIGAR"   
##  [7] "ITPRIP"   "DENR"     "PTPN6"    "LDHB"     "IRAK4"    "HIP1R"   
## [13] "PYCARD"   "TSFM"     "NUDT1"    "PRICKLE1" "PNP"      "MLX"     
## [19] "SAMHD1"   "GNB4"     "SLC2A6"   "SHMT2"    "RILPL2"   "CORO1C"  
## [25] "TWF1"     "RHOF"     "IKBIP"    "SPAG1"    "CKLF"     "PPP2R5B" 
## [31] "NT5E"     "BCAT1"    "CLIP2"    "ITGAL"    "UBE2H"    "ABCD2"   
## [37] "NLRC4"    "SCPEP1"   "MLXIP"    "BCL7A"    "TTC39C"   "FES"     
## [43] "CYFIP1"   "CNOT8"    "LDB1"     "TMEM19"   "ITGB2"    "DPYSL2"  
## [49] "TMEM231"  "RAB29"    "INF2"     "RNF135"   "BIN2"     "ITGAX"   
## 
## $health_record_bin_IGHV_mutated
##  [1] "FTH1"    "FTL"     "USP6NL"  "ZBTB20"  "TBC1D2B" "ABI3"    "SLAMF1" 
##  [8] "FCRL1"   "UBE2E2"  "FCRL2"   "ADD3"   
## 
## $health_record_bin_treated
## character(0)
## 
## $SNPs_EGR2
## [1] "SLC2A1" "ACTN1"  "EPPK1"  "ZNF446"
## 
## $SNPs_SF3B1
## [1] "SUGP1"  "ATAD3B" "EYA3"  
## 
## $SNPs_TP53
## [1] "TP53"  "FUT11"
message("Downregulated hits")
## Downregulated hits
sapply(group_nest(tmp)$mut, function(m){
  tmp %>% filter(mut==m, direction==-1) %>% .$rowname
})
## $chrom_abber_gain8q24
## [1] "MCPH1"
## 
## $chrom_abber_trisomy12
##  [1] "PELI1"    "TGFBR2"   "TCEAL3"   "ACYP1"    "CRAT"     "TCF4"    
##  [7] "SELENOM"  "TLE4"     "CAMK2D"   "SAMSN1"   "TCEAL4"   "CTH"     
## [13] "SLC25A23"
## 
## $health_record_bin_IGHV_mutated
## [1] "CPT1A"    "FAM114A1" "ZAP70"    "SERPINB6" "SEPT10"  
## 
## $health_record_bin_treated
## [1] "MTSS1" "ICAM2"
## 
## $SNPs_EGR2
## character(0)
## 
## $SNPs_SF3B1
##  [1] "TPP2"    "MAP3K7"  "PPP2R5A" "WASHC5"  "WASHC4"  "VPS8"    "IQGAP1" 
##  [8] "WASHC1"  "NAA16"   "DPH5"    "TTI1"    "CCDC88B" "SEPSECS" "SEPT6"  
## [15] "TTI2"    "WASHC3"  "ABCB7"   "DDX58"   "DIP2A"   "IDUA"    "RECQL"  
## [22] "UQCC1"   "JMY"     "SUN1"    "MROH1"   "SEPT2"  
## 
## $SNPs_TP53
## character(0)

Trisomy 12 colored by chromosome

limma_results %>% filter(mut=="chrom_abber_trisomy12") %>%
  mutate("Affected_region" = if_else( chromosome_name=="12", "TRUE", 
                                     if_else(hit_annotation == "hit", "FALSE", "not altered"))) %>% 
  mutate("Affected_region"= factor(Affected_region, levels=c("FALSE","TRUE", "not altered"))) %>%
  mutate(logFC= if_else(logFC > log2(5), log2(5.2), 
                                    if_else(logFC<(-log2(5)), -log2(5.2), logFC) ) ) %>%
ggplot(aes(logFC, -log10(pvalue)))  +
  geom_vline(aes(xintercept = 0)) +
  geom_point(alpha=0.8, aes(colour = Affected_region)) +
  geom_text(aes(label = rowname, color=chromosome_name=="12"), 
            data = subset(limma_results %>% filter(mut=="chrom_abber_trisomy12"), hit_annotation == "hit"), 
            vjust = 0, nudge_y = 0.1, size = 3, check_overlap = FALSE) +
  xlab("log2(fold change)") +
  scale_color_manual(values = c("#0571b0", "orange1", "grey"), drop=FALSE) +
  pp_sra+
  guides(color=FALSE)+
  coord_cartesian(xlim=c(-log2(5), log2(5)), ylim=c(0, -log10(min(limma_results$pvalue)))) +
  ggtitle("trisomy12")+
  theme( plot.title = element_text(size = 20)) 

tmp <- (limma_results %>% 
              filter(mut=="chrom_abber_trisomy12", chromosome_name=="12") %>%
  .$logFC>0)  %>% table %>% as_tibble() 
colnames(tmp) <- c("fc", "n")
tmp <- tmp %>% arrange(desc(`fc`)) 
tmp <- tmp %>% mutate(label=paste( round(n/sum(tmp$n)*100, 1), "%" ), cums =cumsum(tmp$n) ) 
tmp %>%
  mutate("fc"=as.factor(`fc`)) %>% 
  ggplot(aes(x="", fill=`fc`,y= n)) +
  geom_col() +
  ggtitle("Proteins on chromosome 12 with positive logFC in trisomy 12") + 
  coord_polar("y", start=0) +
  pp_sra +
  theme(axis.title = element_blank(), axis.ticks = element_blank())+
  scale_fill_manual(values = c( "grey", "#0571b0"), drop=FALSE)+
  annotate(geom = "text", y = tmp$cums-(tmp$n/2), x = 1, label = tmp$label)

tmp <- (limma_results %>%  
  filter(mut=="chrom_abber_trisomy12", hit==TRUE) %>%
  .$chromosome_name == "12") %>%
  table %>% as_tibble()
colnames(tmp) <- c("chr12", "n")
tmp <- tmp %>% arrange(desc(`chr12`)) 
tmp <- tmp %>% mutate(label=paste( round(n/sum(tmp$n)*100, 1), "%" ), cums =cumsum(tmp$n) ) 
piechart_hits_prot_chr_tris12 <-  tmp %>%
  mutate("chr12"=as.factor(`chr12`)) %>% 
  ggplot(aes(x="", fill=`chr12`,y= n)) +
  geom_col() +
  ggtitle("Chromosome location of hits in trisomy 12") + 
  coord_polar("y", start=0) +
  pp_sra +
  theme(axis.title = element_blank(), axis.ticks = element_blank())+
  scale_fill_manual(values = c( "grey", "#0571b0"), drop=FALSE)
piechart_hits_prot_chr_tris12 +
  annotate(geom = "text", y = tmp$cums-(tmp$n/2), x = 1, label = tmp$label)

Gene set enrichment analysis trisomy12 save data needed

####### Input of all Values
tris_pats <- colData(multiomics_MAE) %>% as_tibble() %>% filter(!is.na(trisomy12)) %>% .$patient_ID

longFormat((multiomics_MAE[prot_few_nas , tris_pats ,"proteomics"])) %>% 
  as_tibble() %>% 
  dplyr::select("NAME"=rowname, "DESCRIPTION"=assay , primary, value) %>%
  unique() %>%
  spread(primary, value) #%>%
  #write.table(file = "/Users/sophierabe/Desktop/PhD/Labor/Proteomics/CLL/GSEA_CLL_Proteomics/190726_GSEA_data_long_gradient_trisomy12.txt", quote = FALSE, row.names = FALSE, sep = "\t", na="")

tmp_pats <- longFormat((multiomics_MAE[prot_few_nas , tris_pats ,"proteomics"])) %>% 
  as_tibble() %>% 
  dplyr::select("NAME"=rowname, primary, value) %>%
  unique() %>%
  spread(primary, value)  %>%
  dplyr::select(-1) %>%
  colnames() %>%
  enframe() 
tmp_pats <- tmp_pats %>%
  left_join(., 
            (proteomics_tbl_meta_biomart %>% dplyr::select(primary, trisomy12) %>% unique) , 
            by=c("value"="primary") )
tmp_pats %>%
  dplyr::select(trisomy12) #%>% t() %>%
  #write.table(file = "/Users/sophierabe/Desktop/PhD/Labor/Proteomics/CLL/GSEA_CLL_Proteomics/190726_GSEA_phenotypelabels_long_gradient_trisomy12.cls", quote = FALSE, row.names = FALSE, col.names = FALSE, sep = " ", na="")
dim(tmp_pats)
sum(tmp_pats$trisomy12)

######### Only input proteins which are not on chromosome 12
prot_no_chr12 <- limma_results %>%
  filter(mut=="chrom_abber_trisomy12", chromosome_name!="chr12") %>% .$rowname %>% unique

proteomics_tbl_meta_biomart %>% 
  filter(!is.na(trisomy12), rowname %in% prot_no_chr12) %>%
  dplyr::select("NAME"=rowname,"DESCRIPTION"=assay, primary, value) %>%
  unique() %>%
  spread(primary, value) #%>%
  #write.table(file = "/Users/sophierabe/Desktop/PhD/Labor/Proteomics/CLL/GSEA_CLL_Proteomics/190726_GSEA_data_long_gradient_trisomy12_no_chr12.txt", quote = FALSE, row.names = FALSE, sep = "\t", na="")

Comment: These lists were edited later to fit the GSEA format

del13q14 colored by chromosome

limma_results %>% filter(mut=="chrom_abber_del13q14") %>%
  mutate("Affected_region" = if_else( (chromosome_name=="13" & start_position>47000000 & start_position<51000000 ), "TRUE", 
                                     if_else(hit_annotation == "hit", "FALSE", "not altered"))) %>% 
  mutate("Affected_region"= factor(Affected_region, levels=c("FALSE","TRUE", "not altered"))) %>%
  mutate(logFC= if_else(logFC > log2(5), log2(5.2), 
                                    if_else(logFC<(-log2(5)), -log(5.2), logFC) ) ) %>%
ggplot(aes(logFC, -log10(pvalue)))  +
  geom_vline(aes(xintercept = 0)) +
  geom_point(alpha=0.8, aes(colour = Affected_region)) +
  geom_text(aes(label = rowname, color=(chromosome_name=="13" & start_position>47000000 & start_position<51000000)), 
            data = subset(limma_results %>% filter(mut=="chrom_abber_del13q14"), hit_annotation == "hit"), 
            vjust = 0, nudge_y = 0.1, size = 3, check_overlap = FALSE) +
  xlab("log2(fold change)") +
  scale_color_manual(values = c("#0571b0", "orange1", "grey"), drop=FALSE) +
  pp_sra+
  guides(color=FALSE)+
  coord_cartesian(xlim=c(-log2(5), log2(5)), ylim=c(0, -log10(min(limma_results$pvalue))))+
  ggtitle("del13q14")+
  theme( plot.title = element_text(size = 20)) 

del11q colored by chromosome

limma_results %>% filter(mut=="chrom_abber_del11q") %>%
  mutate("Affected_region" = if_else( (chromosome_name=="11" & start_position>97400000 & start_position<110600000), "TRUE", 
                                     if_else(hit_annotation == "hit", "FALSE", "not altered"))) %>% 
  mutate("Affected_region"= factor(Affected_region, levels=c("FALSE","TRUE", "not altered"))) %>%
  mutate(logFC= if_else(logFC > log2(5), log2(5.2), 
                                    if_else(logFC<(-log2(5)), -log2(5.2), logFC) ) ) %>%
ggplot(aes(logFC, -log10(pvalue)))  +
  geom_vline(aes(xintercept = 0)) +
  geom_point(alpha=0.8, aes(colour = Affected_region)) +
  geom_text(aes(label = rowname, color=(chromosome_name=="11" & start_position>97400000 & start_position<110600000)), 
            data = subset(limma_results %>% filter(mut=="chrom_abber_del11q"), hit_annotation == "hit"), 
            vjust = 0, nudge_y = 0.1, size = 3, check_overlap = FALSE) +
  xlab("log2(fold change)") +
  scale_color_manual(values = c("#0571b0", "orange1", "grey"), drop=FALSE) +
  pp_sra+
  guides(color=FALSE)+
  coord_cartesian(xlim=c(-log2(5), log2(5)), ylim=c(0, -log10(min(limma_results$pvalue)))) +
  ggtitle("del11q")+
  theme(plot.title = element_text(size = 20)) 

del17p13 colored by chromosome

limma_results %>% filter(mut=="chrom_abber_del17p13") %>%
  mutate("Affected_region" = if_else( (chromosome_name=="17" & mean_position<10800000), "TRUE", 
                                     if_else(hit_annotation == "hit", "FALSE", "not altered"))) %>% 
  mutate("Affected_region"= factor(Affected_region, levels=c("FALSE","TRUE", "not altered"))) %>%
  mutate(logFC= if_else(logFC > log2(5), log2(5.2), 
                                    if_else(logFC<(-log2(5)), -log2(5.2), logFC) ) ) %>%
ggplot(aes(logFC, -log10(pvalue)))  +
  geom_vline(aes(xintercept = 0)) +
  geom_point(alpha=0.8, aes(colour = Affected_region)) +
  geom_text(aes(label = rowname, color=(chromosome_name=="17" & mean_position<10800000)), 
            data = subset(limma_results %>% filter(mut=="chrom_abber_del17p13"), hit_annotation == "hit"), 
            vjust = 0, nudge_y = 0.1, size = 3, check_overlap = FALSE) +
  xlab("log2(fold change)") +
  scale_color_manual(values = c("#0571b0", "orange1", "grey"), drop=FALSE) +
  pp_sra+
  guides(color=FALSE)+
  coord_cartesian(xlim=c(-log2(5), log2(5)), ylim=c(0, -log10(min(limma_results$pvalue)))) +
  ggtitle("del17p13")+
  theme(plot.title = element_text(size = 20)) 

XPO1

XPO_volcano_plot <- limma_results %>% filter(mut=="SNPs_XPO1") %>%
  mutate("Affected_region" = if_else( rowname=="XPO1", "TRUE", 
                                     if_else(hit_annotation == "hit", "FALSE", "not altered"))) %>% 
  mutate("Affected_region"= factor(Affected_region, levels=c("FALSE","TRUE", "not altered"))) %>%
ggplot(aes(logFC, -log10(pvalue)))  +
  geom_vline(aes(xintercept = 0)) +
  geom_point(alpha=0.8, aes(colour = Affected_region)) +
  geom_text(aes(label = rowname), 
            data = subset(limma_results %>% filter(mut=="SNPs_XPO1", rowname!="XPO1"), hit_annotation == "hit"), 
            vjust = 0, size = 3, nudge_y = (-1), nudge_x = -0.3,
            check_overlap = FALSE, color="#0571b0") +
   geom_text(aes(label = rowname, color=rowname=="XPO1"), 
            data = subset(limma_results %>% filter(mut=="SNPs_XPO1", rowname=="XPO1")), 
            vjust = 0, size = 3,  nudge_x = 0.2, nudge_y = (-0.5), 
            check_overlap = FALSE) +
  xlab("log2(fold change)") +
  scale_color_manual(values = c("#0571b0", "orange1", "grey"), drop=FALSE) +
  pp_sra+
  coord_cartesian(xlim=c(-log2(5), log2(5)) ) 

XPO_volcano_plot + ggtitle("XPO1")+
  theme( plot.title = element_text(size = 20)) +
  guides(color=FALSE)

TP53

TP53_volcano_plot <- limma_results %>% filter(mut=="SNPs_TP53") %>%
  mutate( rowname = if_else(rowname == "TP53", "P53", rowname) ) %>%
  mutate("Affected_region" = if_else( rowname=="P53", "TRUE", 
                                     if_else(hit_annotation == "hit", "FALSE", "not altered"))) %>% 
  mutate("Affected_region"= factor(Affected_region, levels=c("FALSE","TRUE", "not altered"))) %>%
ggplot(aes(logFC, -log10(pvalue)))  +
  geom_vline(aes(xintercept = 0)) +
  geom_point(alpha=0.8, aes(colour = Affected_region)) +
  geom_text(aes(label = rowname), 
            data = subset(limma_results %>% 
                            mutate( rowname = if_else(rowname == "TP53", "P53", rowname) ) %>%
                            filter(mut=="SNPs_TP53", rowname!="P53"), hit_annotation == "hit"), 
            vjust = 0, size = 3,  nudge_y = -0.3, nudge_x = -0.2, check_overlap = FALSE, color="#0571b0") +
   geom_text(aes(label = rowname, color=rowname=="P53"), 
            data = subset(limma_results %>% 
                            mutate( rowname = if_else(rowname == "TP53", "P53", rowname) ) %>%
                            filter(mut=="SNPs_TP53", rowname=="P53")), 
            vjust = 0, size = 3, nudge_y = -0.3, nudge_x = -0.2, check_overlap = FALSE) +
  xlab("log2(fold change)") +
  scale_color_manual(values = c("#0571b0", "orange1", "grey"), drop=FALSE) +
  pp_sra+
  coord_cartesian(xlim=c(-log2(2.2), log2(2.2)) ) 

TP53_volcano_plot + ggtitle("TP53")+
  theme( plot.title = element_text(size = 20)) +
  guides(color=FALSE)

ATM

ATM_volcano_plot <- limma_results %>% filter(mut=="SNPs_ATM") %>%
  mutate("Affected_region" = if_else( rowname=="ATM", "TRUE", 
                                     if_else(hit_annotation == "hit", "FALSE", "not altered"))) %>% 
  mutate("Affected_region"= factor(Affected_region, levels=c("FALSE","TRUE", "not altered"))) %>%
ggplot(aes(logFC, -log10(pvalue)))  +
  geom_vline(aes(xintercept = 0)) +
  geom_point(alpha=0.8, aes(colour = Affected_region)) +
  geom_text(aes(label = rowname), 
            data = subset(limma_results %>% filter(mut=="SNPs_ATM", rowname!="ATM"), hit_annotation == "hit"), 
            vjust = 0, size = 3,  nudge_y = -0.3, nudge_x = -0.2, check_overlap = FALSE, color="#0571b0") +
   geom_text(aes(label = rowname, color=rowname=="ATM"), 
            data = subset(limma_results %>% filter(mut=="SNPs_ATM", rowname=="ATM")), 
            vjust = 0, size = 3, nudge_y = -0.3, nudge_x = -0.2, check_overlap = FALSE) +
  xlab("log2(fold change)") +
  scale_color_manual(values = c("#0571b0", "orange1", "grey"), drop=FALSE) +
  pp_sra+
  coord_cartesian(xlim=c(-log2(2.2), log2(2.2)) ) 

ATM_volcano_plot + ggtitle("ATM")+
  theme( plot.title = element_text(size = 20)) +
  guides(color=FALSE)

Limma with subsets of patients

Consensus clusters groups 5 vs. all others

selpats <- colData(multiomics_MAE) %>% as_tibble() %>% filter(!is.na(PG)) %>% .$patient_ID
proteomics_tbl_meta_biomart_tmp <- proteomics_tbl_meta_biomart
proteomics_tbl_meta_biomart <- left_join(proteomics_tbl_meta_biomart, enframe(CCP_group5==5, name = "primary", value="PG"), by="primary" )
limma_results_CC5_5_all <-  Calculate_Limma(assay_data = assay(multiomics_MAE[prot_few_nas , selpats ,"proteomics"]), mut = "PG")
## ExperimentList contains data.frame or DataFrame,
##   potential for errors with mixed data types
## harmonizing input:
##   removing 457 sampleMap rows not in names(experiments)
proteomics_tbl_meta_biomart <- proteomics_tbl_meta_biomart_tmp 

ggplot(data = limma_results_CC5_5_all) +
  geom_histogram(aes(pvalue.limma, alpha = 0.5), bins = 40) +
  guides(alpha = FALSE) +
  xlab("p-value") +
  facet_wrap( ~ mut , scale = "free_y") +
  coord_cartesian(xlim = c(0, 1)) +
  pp_sra +
  ggtitle("PG groups")

limma_results_CC5_5_all <- Annotate_Limma_Results(limma_results_CC5_5_all)

limma_results_CC5_5_all %>% 
  mutate("Affected_region" =  if_else(hit_annotation == "hit",  "hit", "not altered")) %>% 
  mutate("Affected_region"= factor(Affected_region, levels=c("hit", "not altered"))) %>%
ggplot(aes(logFC, -log10(pvalue)))  +
  geom_vline(aes(xintercept = 0)) +
  geom_point(alpha=0.8, aes(colour = Affected_region)) +
  geom_text(aes(label = rowname), color="#0571b0", 
            data = subset(limma_results_CC5_5_all %>% filter( hit_annotation == "hit"), 
            vjust = 0, nudge_y = 0.1, size = 3, check_overlap = FALSE)) +
  facet_wrap( ~ mut, ncol = 1) +
  xlab("log2(fold change)") +
  scale_color_manual(values = c("#0571b0",  "grey"), drop=FALSE) +
  pp_sra+
  guides(color=guide_legend(title = "PG groups"))+
  ggtitle("Consensus cluster plus group 5 against all others") 

limma_results_CC5_5_all %>% filter(hit==TRUE) %>% .$rowname
##  [1] "VAV2"     "AFP"      "ZNRF2"    "SERPINC1" "BIN2"     "TF"      
##  [7] "CLEC3B"   "FBLN1"    "ARSA"     "NRAS"     "PPP3R1"   "ITIH2"   
## [13] "PTPN6"    "A2M"      "MAPK15"   "TST"      "ISYNA1"   "TTC9"    
## [19] "SLC38A2"  "MAPKAPK5" "KYNU"     "AKT3"     "SLC22A18" "SERPINF2"
gene.data <- limma_results_CC5_5_all %>% 
  filter( rowname %in% BCR_genes$symbol) %>% 
  dplyr::select(logFC) %>% unlist 
names(gene.data) <- limma_results_CC5_5_all %>% 
  filter( rowname %in% BCR_genes$symbol) %>% 
  dplyr::select(rowname) %>% unlist

oldwd <- getwd()
setwd("/Volumes/sd17b003/Sophie/Analysis/CLL_Proteomics/CLL_Proteomics_final/Figures/pathview/")
pathview::pathview(pathway.id = "04662", gene.data = gene.data, species = "hsa", gene.idtype = "SYMBOL", 
         bins = 10, 
         limit = list(gene=c( -max(abs(gene.data), na.rm = TRUE) , max(abs(gene.data), na.rm = TRUE))), 
         kegg.native = TRUE, out.suffix = "PG5")
pathview::pathview(pathway.id = "04662", gene.data = gene.data, species = "hsa", gene.idtype = "SYMBOL", 
         bins = 10, 
         limit = list(gene=c( -max(abs(gene.data), na.rm = TRUE) , max(abs(gene.data), na.rm = TRUE))), 
         kegg.native = FALSE, out.suffix = "PG5")
setwd(oldwd)

Gene set enrichment analysis consensus cluster plus groups 5 vs. all

####### Input of all Values
longFormat((multiomics_MAE[prot_few_nas , selpats ,"proteomics"])) %>% 
  as_tibble() %>% 
  dplyr::select("NAME"=rowname, "DESCRIPTION"=assay , primary, value) %>%
  unique() %>%
  spread(primary, value) #%>%
  #write.table(file = "/Users/sophierabe/Desktop/PhD/Labor/Proteomics/CLL/GSEA_CLL_Proteomics/190927_GSEA_data_long_gradient_CCP_5vsall.txt", quote = FALSE, row.names = FALSE, sep = "\t", na="")

tmp_pats <- longFormat((multiomics_MAE[prot_few_nas , selpats ,"proteomics"])) %>% 
  as_tibble() %>% 
  dplyr::select("NAME"=rowname, primary, value) %>%
  unique() %>%
  spread(primary, value)  %>%
  dplyr::select(-1) %>%
  colnames() %>%
  enframe() 
tmp_pats <- tmp_pats %>%
  left_join(., 
            enframe(multiomics_MAE$PG, value="PG", name="primary" ) %>% mutate("PG"=PG==5) , 
            by=c("value"="primary") )
tmp_pats %>%
  dplyr::select(PG) #%>% t() %>%
  #write.table(file = "/Users/sophierabe/Desktop/PhD/Labor/Proteomics/CLL/GSEA_CLL_Proteomics/190927_GSEA_phenotypelabels_long_gradient__CCP_5vsall.cls", quote = FALSE, row.names = FALSE, col.names = FALSE, sep = " ", na="")
dim(tmp_pats)
sum(tmp_pats$PG==TRUE)

Save important plots

save(dif_proteins_P_plot, XPO_volcano_plot, TP53_volcano_plot, ATM_volcano_plot, piechart_hits_prot_chr_tris12, 
     dif_proteins_P_FDR5_plot, limma_protein_all_volcano_plots,
     file = "RData_plots/CLL_Proteomics_Limma_Plots.RData")

Save data needed for further analysis

save(limma_results,
     file = "data/CLL_Proteomics_LimmaProteomics.RData")

Session Info

sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] DEqMS_1.6.0                 gplots_3.1.1               
##  [3] MultiAssayExperiment_1.14.0 apeglm_1.10.0              
##  [5] limma_3.44.3                fdrtool_1.2.16             
##  [7] vsn_3.56.0                  forcats_0.5.1              
##  [9] stringr_1.4.0               dplyr_1.0.3                
## [11] purrr_0.3.4                 readr_1.4.0                
## [13] tidyr_1.1.2                 tibble_3.0.6               
## [15] tidyverse_1.3.0             DESeq2_1.28.1              
## [17] SummarizedExperiment_1.18.2 DelayedArray_0.14.1        
## [19] matrixStats_0.58.0          Biobase_2.48.0             
## [21] GenomicRanges_1.40.0        GenomeInfoDb_1.24.2        
## [23] IRanges_2.22.2              S4Vectors_0.26.1           
## [25] BiocGenerics_0.34.0         ggrepel_0.9.1              
## [27] ggpubr_0.4.0                Hmisc_4.4-2                
## [29] ggplot2_3.3.3               Formula_1.2-4              
## [31] survival_3.2-7              lattice_0.20-41            
## [33] Matrix_1.3-2                pheatmap_1.0.12            
## [35] gtools_3.8.2                plyr_1.8.6                 
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1           backports_1.2.1        splines_4.0.2         
##   [4] BiocParallel_1.22.0    digest_0.6.27          htmltools_0.5.1.1     
##   [7] magrittr_2.0.1         checkmate_2.0.0        memoise_2.0.0         
##  [10] cluster_2.1.0          openxlsx_4.2.3         annotate_1.66.0       
##  [13] modelr_0.1.8           bdsmatrix_1.3-4        jpeg_0.1-8.1          
##  [16] colorspace_2.0-0       blob_1.2.1             rvest_0.3.6           
##  [19] haven_2.3.1            xfun_0.20              crayon_1.4.0          
##  [22] RCurl_1.98-1.2         jsonlite_1.7.2         genefilter_1.70.0     
##  [25] glue_1.4.2             gtable_0.3.0           zlibbioc_1.34.0       
##  [28] XVector_0.28.0         car_3.0-10             abind_1.4-5           
##  [31] scales_1.1.1           mvtnorm_1.1-1          DBI_1.1.1             
##  [34] rstatix_0.6.0          Rcpp_1.0.6             xtable_1.8-4          
##  [37] emdbook_1.3.12         htmlTable_2.1.0        foreign_0.8-81        
##  [40] bit_4.0.4              preprocessCore_1.50.0  htmlwidgets_1.5.3     
##  [43] httr_1.4.2             RColorBrewer_1.1-2     ellipsis_0.3.1        
##  [46] farver_2.0.3           pkgconfig_2.0.3        XML_3.99-0.5          
##  [49] nnet_7.3-15            dbplyr_2.0.0           locfit_1.5-9.4        
##  [52] labeling_0.4.2         tidyselect_1.1.0       rlang_0.4.10          
##  [55] AnnotationDbi_1.50.3   munsell_0.5.0          cellranger_1.1.0      
##  [58] tools_4.0.2            cachem_1.0.1           cli_2.3.0             
##  [61] generics_0.1.0         RSQLite_2.2.3          broom_0.7.4           
##  [64] evaluate_0.14          fastmap_1.1.0          yaml_2.2.1            
##  [67] knitr_1.31             bit64_4.0.5            fs_1.5.0              
##  [70] zip_2.1.1              caTools_1.18.1         xml2_1.3.2            
##  [73] compiler_4.0.2         rstudioapi_0.13        curl_4.3              
##  [76] png_0.1-7              affyio_1.58.0          ggsignif_0.6.0        
##  [79] reprex_1.0.0           geneplotter_1.66.0     stringi_1.5.3         
##  [82] highr_0.8              vctrs_0.3.6            pillar_1.4.7          
##  [85] lifecycle_0.2.0        BiocManager_1.30.10    data.table_1.13.6     
##  [88] bitops_1.0-6           R6_2.5.0               latticeExtra_0.6-29   
##  [91] affy_1.66.0            KernSmooth_2.23-18     gridExtra_2.3         
##  [94] rio_0.5.16             MASS_7.3-53            assertthat_0.2.1      
##  [97] withr_2.4.1            GenomeInfoDbData_1.2.3 hms_1.0.0             
## [100] grid_4.0.2             rpart_4.1-15           coda_0.19-4           
## [103] rmarkdown_2.6          carData_3.0-4          bbmle_1.0.23.1        
## [106] numDeriv_2016.8-1.1    lubridate_1.7.9.2      base64enc_0.1-3